set more off
clear matrix
graph drop _all

	
use  "Data_temp\bhps_US_vote_extract.dta", clear
*********************************************************************************************************
* Covariates / heterogeneities
* Individual characteristics
gen female=(sex==2)
lab var female "Female"

* employment
generate employed=(employ==1) if  employ>0 & employ<.
replace employed=(jbhas==1) if employed==. & ( jbhas>0 & jbhas<.)
lab var employed "Employed"

* region
gen england=(inrange(region,1,16))
gen scotland=(region==18)
gen wales=(region==17)
gen north_ireland=(region==19)
tab1 england-north_ireland

* Birth date from months and year
gen birth_date=ym(birthy,birthm)
format birth_date %tm
lab var birth_date "Birth date (month and year)"
drop if birth_date==.

* Interview date
gen interview_date=ym(year, doim)
replace interview_date=ym(year, intm) if interview_date==.
format interview_date %tm
lab var interview_date "Interview date (month and year)"

* age in years (using exact interview date if possible)
gen age=round((interview_date-birth_date)/12,.1)
replace age=(year-birthy) if age==.
lab var age "Age at interview in years"

gen birth_month=month(dofm(birth_date))
lab var birth_month "Calendar month of birth"


*************************************************************************************************************
* Outcomes
* Voting outcomes
***********************
**VOTING  AT ELECTIONS*
***********************
* election dates
gen election1966=ym(1966,3)
gen election1970=ym(1970,6)
gen election1974=ym(1974,10) 
gen election1979=ym(1979,5)
gen election1983=ym(1983,6)
gen election1987=ym(1987,6)
gen election1992=ym(1992,4)
gen election1997=ym(1997,5)
gen election2001=ym(2001,6)
gen election2005=ym(2005,5)
gen election2010=ym(2010,5)
gen election2015=ym(2015,5)
gen election2017=ym(2017,6)

foreach num of numlist 1970 1974 1979 1983 1987 1992 1997 2001 2005 2010 2015 2017	{	
	format election`num' %tm
	}
	
// recode missings
recode vote7 (-9 -8 -7 7 =.)

* only use survey between two elections to derive answers to specific election
gen tmp_vote1992=vote7 if (vote7<.) & (election1992<=interview_date & election1997>interview_date)
gen tmp_vote1997=vote7 if (vote7<.) & (election1997<=interview_date & election2001>interview_date)
gen tmp_vote2001=vote7 if (vote7<.) & (election2001<=interview_date & election2005>interview_date)
gen tmp_vote2005=vote7 if (vote7<.) & (election2005<=interview_date & election2010>interview_date)
gen tmp_vote2010=vote7 if (vote7<.) & (election2010<=interview_date & election2015>interview_date)
gen tmp_vote2015=vote7 if (vote7<.) & (election2015<=interview_date & election2017>interview_date)
gen tmp_vote2017=vote7 if (vote7<.) & (election2017<=interview_date & interview_date<.)
	
****************************************************************************************

foreach e in 1992 1997 2001 2005 2010 2015 2017 {
	sum tmp_vote`e'
	replace tmp_vote`e'=. if (interview_date-election`e'>24)
	ren tmp_vote`e' voted`e'
	sum voted`e'
	}
	
	
**************************************************************************************************************	
* Edit outcomes for political involvement

* Party support
gen party_dummy=(vote1==1) if vote1>0
lab var party_dummy "Support a party"

gen party_closer=(vote2==1) if vote2>0
lab var party_closer "Closer to one political party than other (if not supporting)"

gen party_closer1=party_dummy
replace party_closer1=1 if party_closer==1 
lab var party_closer1 "Closer to one political party"


gen 	party_support=1 if vote==1
replace party_support=2 if vote==2
replace party_support=3 if vote==3
lab var party_support "Supporting a political party"
label define party 1 "Tories" 2 "Labour" 3 "LibDems" 4 "Other"
lab val party_support party
tab party_support

gen vote_party=(vote3!=95) if vote3>0 & vote3!=96
tab vote3 vote_party
lab var vote3 "States a party to vote for tomorrow"

* Voting as a norm
gen vote_norm=(6-votenorm) if inrange(votenorm,1,5)
lab var vote_norm "Voting as a social norm (1 strongly disagree to 5 strongly agree)"

gen vote_agree=(votenorm<3) if vote_norm<. & vote_norm>0
lab var vote_agree "Agree that voting is a social norm"

* Political interest
gen politic_interest=5-vote6 if vote6>0
lab var politic_interest "Interest in politics (1-4)"
lab def politi 1 "Not at all" 2 "Not very" 3 "Fairly interested" 4 "Very interested"
lab val politi politi

gen interest_polit=(politic_interest>=3) if politic_interest<.
lab var interest_polit "Interested or very interested in politics"

* standardise
sum party_closer1
gen sd_closer=(party_closer1-r(mean))/(r(sd))

sum politic_interest
gen sd_politi=(politic_interest-r(mean))/(r(sd))

gen h_agree=(6-votenorm) if votenorm>0
sum h_agree
gen sd_agree=(h_agree-r(mean))/r(sd)

sum vote_party
gen sd_party=(vote_party-r(mean))/r(sd)

sum member_party
gen sd_member=(member_party-r(mean))/r(sd)

* joint indeces
* 1 average for whenever observations are non-missing
egen h1_index=rownonmiss(sd*) // # non-missing outcomes
egen h2_index=rowtotal(sd*) if h1_index!=0
replace h2_index=h2_index/h1_index
sum h2_index
gen index=(h2_index-r(mean))/r(sd) 
lab var index "Index of political involvement"

drop h_agree h?_index 	 


***************************************************************************************
* Save data half-way before further editing
* Keep only relevant age range & variables needed
keep if inrange(age,16,55)
keep 	pno pidp year female-age born_abroad mum_lowedu mwork14  region ///
		wave age_dv  hhtype hhsize election* ///
		vote* mum_brit  urban married white_brit mum_brit ///
		low_edu member_party  single still_school lprnt in_education index ///
		party_closer1 interest_polit birth_month

order pidp wave year, first
save "Data_temp\BHPS_US_edit_mid.dta", replace


***************************************************************************************
* Edit data for voting
***************************************************************************************

use "Data_temp\BHPS_US_edit_mid.dta", clear

* Keep observation with voting information
keep if  (voted1992<. | voted1997<. | voted2001<. | voted2005 <. | voted2010<. | voted2015<. | voted2017<.)

* Relative age at vote and treatment assignment by election
foreach x in 1966 1970 1974 1979 1983 1987 1992 1997 2001 2005 2010 2015 2017 {
	gen age`x'=(election`x'-birth_date)-12*18 
	}

********************************************************************
* Editing to have everything relative to previous elections
* Voting indicator - in last general election
tab vote7

gen vote_missing=(vote7==-2) 
lab var vote_missing "Refusal for voting"

gen voted=(vote7==1) if vote7>0
lab var voted "Voted in last general election"

* Election indicator	
gen election=.
replace election=1992	if (interview_date>=election1992 & interview_date<election1997) & vote7<.
replace election=1997 	if (interview_date>=election1997 & interview_date<election2001) & vote7<.
replace election=2001	if (interview_date>=election2001 & interview_date<election2005) & vote7<.
replace election=2005 	if (interview_date>=election2005 & interview_date<election2010) & vote7<.
replace election=2010 	if (interview_date>=election2010 & interview_date<election2015) & vote7<.
replace election=2015 	if (interview_date>=election2015 & interview_date<election2017) & vote7<.
replace election=2017 	if (interview_date>=election2017 & interview_date<.) 	  & vote7<.

	
* Code relative age at different elections: code this manually due to different electoral cycles	
forvalues y = 0/6 {	
	gen age`y'=.
	lab var age`y' "Relative age at `y' election ago"
	}

replace age0=age2017 if election==2017
replace age0=age2015 if election==2015
replace age0=age2010 if election==2010
replace age0=age2005 if election==2005
replace age0=age2001 if election==2001
replace age0=age1997 if election==1997
replace age0=age1992 if election==1992

replace age1=age2015 if election==2017	
replace age1=age2010 if election==2015
replace age1=age2005 if election==2010
replace age1=age2001 if election==2005
replace age1=age1997 if election==2001
replace age1=age1992 if election==1997
replace age1=age1987 if election==1992

replace age2=age2010 if election==2017
replace age2=age2005 if election==2015
replace age2=age2001 if election==2010
replace age2=age1997 if election==2005
replace age2=age1992 if election==2001
replace age2=age1987 if election==1997
replace age2=age1983 if election==1992

replace age3=age2005 if election==2017
replace age3=age2001 if election==2015
replace age3=age1997 if election==2010
replace age3=age1992 if election==2005
replace age3=age1987 if election==2001
replace age3=age1983 if election==1997
replace age3=age1979 if election==1992

replace age4=age2001 if election==2017
replace age4=age1997 if election==2015
replace age4=age1992 if election==2010
replace age4=age1987 if election==2005
replace age4=age1983 if election==2001
replace age4=age1979 if election==1997

replace age5=age1997 if election==2017
replace age5=age1992 if election==2015
replace age5=age1987 if election==2010
replace age5=age1983 if election==2005
replace age5=age1979 if election==2001
replace age5=age1970 if election==1992

replace age6=age1992 if election==2017
replace age6=age1987 if election==2015
replace age6=age1983 if election==2010
replace age6=age1979 if election==2005
replace age6=age1970 if election==1997
replace age6=age1966 if election==1992


save "Data_temp/turnout.dta", replace



********************************************************************************************************
* Editing for political involvement
********************************************************************************************************

use "Data_temp\BHPS_US_edit_mid.dta", clear

* Year relative to election
foreach x in 1966 1970 1974 1979 1983 1987 1992 1997 2001 2005 2010 2015 2017 {
	gen age`x'=(election`x'-birth_date)-12*18
	}

* Election indicator	
gen election=.
replace election=1992	if (interview_date>=election1992 & interview_date<election1997) 
replace election=1997 	if (interview_date>=election1997 & interview_date<election2001) 
replace election=2001	if (interview_date>=election2001 & interview_date<election2005) 
replace election=2005 	if (interview_date>=election2005 & interview_date<election2010) 
replace election=2010 	if (interview_date>=election2010 & interview_date<election2015) 
replace election=2015 	if (interview_date>=election2015 & interview_date<election2017) 
replace election=2017 	if (interview_date>=election2017 & interview_date<.) 	

foreach y of numlist 0/3 100 {	
	gen age`y'=.
	lab var age`y' "Relative age at `y' election ago"
	}
lab var age100 "Relative age for upcoming election"	
	
* 100 for before election as negative numbers not possible in variable name	
replace age100=age2017 if election==2015
replace age100=age2015 if election==2010
replace age100=age2010 if election==2005
replace age100=age2005 if election==2002
replace age100=age2001 if election==1997
replace age100=age1997 if election==1992
	
* after elections
replace age0=age2017 if election==2017
replace age0=age2015 if election==2015
replace age0=age2010 if election==2010
replace age0=age2005 if election==2005
replace age0=age2001 if election==2001
replace age0=age1997 if election==1997
replace age0=age1992 if election==1992

replace age1=age2015 if election==2017	
replace age1=age2010 if election==2015
replace age1=age2005 if election==2010
replace age1=age2001 if election==2005
replace age1=age1997 if election==2001
replace age1=age1992 if election==1997
replace age1=age1987 if election==1992

replace age2=age2010 if election==2017
replace age2=age2005 if election==2015
replace age2=age2001 if election==2010
replace age2=age1997 if election==2005
replace age2=age1992 if election==2001
replace age2=age1987 if election==1997
replace age2=age1983 if election==1992

replace age3=age2005 if election==2017
replace age3=age2001 if election==2015
replace age3=age1997 if election==2010
replace age3=age1992 if election==2005
replace age3=age1987 if election==2001
replace age3=age1983 if election==1997
replace age3=age1979 if election==1992

* identify relative age at elections and determine assignment
gen elec_cut=.
lab var elec_cut "Election of cut-off"
gen rel_age=.
lab var rel_age "Relative age in months at election cut-off"

foreach x in 1979 1983 1987 1992 1997 2001 2005 2010 2015 2017 {
	gen rel_`x'=(election`x'-birth_date)-18*12
	replace elec_cut=`x' if inrange(rel_`x',-24,23)
	replace rel_age=rel_`x' if elec_cut==`x'
	}
keep if rel_age<.
tab year elec_cut

* year of interview relative to election
gen rel_year=.
lab var rel_year "Year relative to election of cut-off"
foreach x in 1979 1983 1987 1992 1997 2001 2005 2010 2015 2017 {
	replace rel_year=year-`x' if elec_cut==`x'
	}

	


*****************************************************************************
* For pooled estimates: create duplicates for observations appearing in several regressions

gen m1=1 if inrange(age1,-48,47) & inrange(age2,-24,24)		
tab m1
expand 2 if m1==1, gen(dupl12)
replace age2=. if dupl12==1 // ignore those from duplicate observations
replace age1=. if dupl12==0 & inrange(age2,-24,24) //

gen m2=1 if inrange(age2,-24,24) & inrange(age3,-24,24)		
expand 2 if m2==1, gen(dupl23)
replace age2=. if dupl23==1
replace age3=. if dupl23==0 & inrange(age2,-24,24) 
drop m?

gen rel_age2=.
forvalues x = 1/3 {
		replace rel_age2=age`x' if inrange(age`x',-48,48) & rel_age2==.
		}

save "Data_temp/polit_involv.dta", replace

